import re
import urllib.request as ur

with open('static/html/index.html', 'r', encoding='utf-8') as f:
    html = f.read()
    # 空格替换换行符
    html = re.sub('\n', '', html)
    # category匹配
    item_pattern = '<section class="main_section">(.*?)</section>'
    # 提取字符串
    category_pattren = '<h1>(.*?)</h1>'
    # course
    course_pattern = '<span class="course_name">(.*?)</span>'

    item_names = re.findall(item_pattern, html)
    print(item_names)
    print(len(item_names))

    data_s = []

    for item_name in item_names:

        category_name = re.findall(category_pattren, item_name)[0]
        course_names = re.findall(course_pattern, item_name)

        data_s.append(
            {
                'category_name': category_name,
                'course_names': course_names
             }
        )

print(data_s)

# for data in data_s:
#     print(data['category_name'])
#     for course_name in data['course_names']:
#         print('******',course_name)

for data in data_s:
    print(data.get('category_name'))
    for course_name in data.get('course_names'):
        print('******', course_name)